# Loading required packages
library(lattice); library(miceadds); library(sandwich); library(stargazer); library(xtable); library(udpipe)

# Loading the data
load("county_level_allocation_data.RData")

#############
# TABLE A.1 #
#############
print.xtable(xtable(table(senators_appropriations_matched$subcommittee, senators_appropriations_matched$year), type = "latex",
                    caption="Total Number of Requests by Committee and Fiscal Year",label="descriptive-to-subcommittee"),
             hline.after=c(-1, 0),
             add.to.row = list(pos = list(9),
                               command = paste0("\\hline \n","\\multicolumn{3}{l}{\\parbox[t]{1\\textwidth}{\\footnotesize \\textit{Note}: The table presents the total number of requests made by senators 
                               to each of the subcommittees of the Appropriations Committee for earmarks in Fiscal Years 2022 and 2023}}")), caption.placement="top")

#############
# TABLE A.4 #
#############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Limiting to observations used in analysis
senators_appropriations_matched <- senators_appropriations_matched[!is.na(senators_appropriations_matched$mean_core_county),]

# Making a table of means and standard deviations for county-level request (non-standardized) variables
# Variables to use
cols <- c("request_granted","percent_request_fulfilled","amount_percapita", 
           "log.total.requests.made", "joint_request", "num_counties",
           "dem", "on_appropriations", "meddist", "mean_core_county", "mean_swing_county",
           "logpop", "female", "seniority", "party_leader", "freshman", "mean_pct_urban", "mean_median_household_income", 
           "any_capital")

stargazer(as.data.frame(senators_appropriations_matched[,cols]), covariate.labels=c("Request Granted", "Proportion of Request Fulfilled", 
                                                                            "Request Amount (Per Capita)","Total Amount of Senator Requests (Log)", 
                                                                            "Joint Request", "Number of Counties Affected","Democrat (Majority Party Member)", "Member of Appropriations Committee",
                                                                            "Distance from DW-NOMINATE Median", "Percent Core Counties", "Percent Swing Counties",
                                                                            "Log Total Population in Affected Counties", "Senator is a Woman", "Seniority","Party Leader",
                                                                            "Freshman Senator","Average Percent Urban Population in Affected Counties", "Average Median Household Income in Affected Counties",
                                                                            "Capital City"),
          title="Summary Statistics of Dataset Analyzing County-Level Outcomes", digits=2, summary.stat= c("mean", "sd", "min","max"),
          notes='\\parbox[t]{1\\textwidth}{\\footnotesize \\textit{Note}: Table presents summary statistics for the (non-standardized) variables used in our county-level analysis.}')

#############
# TABLE C.3 #
#############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# FY 2022
# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_fy_2022 <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched[senators_appropriations_matched$year==2021,])

reg1_fy_2022 <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(subcommittee),data=senators_appropriations_matched[senators_appropriations_matched$year==2021,])

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_fy_2022 <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital  + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched[senators_appropriations_matched$year==2021,])

reg2_fy_2022 <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital  + factor(subcommittee),data=senators_appropriations_matched[senators_appropriations_matched$year==2021,])


# Save coefficients and standard errors from the model
cond_fy_2022 <- data.frame(summary(reg1_fe_fy_2022))
cond2_fy_2022  <- data.frame(summary(reg2_fe_fy_2022))

coefs_fy_2022 <- c(cond_fy_2022[,1])
ses_fy_2022 <- c(cond_fy_2022[,2])
coefs2_fy_2022 <- c(cond2_fy_2022[,1])
ses2_fy_2022 <- c(cond2_fy_2022[,2])

# FY 2023
# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_fy_2023 <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                             logpop_scaled + female + seniority_scaled + party_leader +
                             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(subcommittee),
                           cluster = "senator", data=senators_appropriations_matched[senators_appropriations_matched$year==2022,])

reg1_fy_2023 <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                  dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                  logpop_scaled + female + seniority_scaled + party_leader +
                  freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(subcommittee),data=senators_appropriations_matched[senators_appropriations_matched$year==2022,])

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_fy_2023 <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                             logpop_scaled + female + seniority_scaled + party_leader +
                             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital  + factor(subcommittee),
                           cluster = "senator", data=senators_appropriations_matched[senators_appropriations_matched$year==2022,])

reg2_fy_2023 <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                  dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                  logpop_scaled + female + seniority_scaled + party_leader +
                  freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital  + factor(subcommittee),data=senators_appropriations_matched[senators_appropriations_matched$year==2022,])

# Save coefficients and standard errors from the model
cond_fy_2023  <- data.frame(summary(reg1_fe_fy_2023))
cond2_fy_2023  <- data.frame(summary(reg2_fe_fy_2023))

coefs_fy_2023 <- c(cond_fy_2023[,1])
ses_fy_2023 <- c(cond_fy_2023[,2])
coefs2_fy_2023 <- c(cond2_fy_2023[,1])
ses2_fy_2023 <- c(cond2_fy_2023[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_fy_2022, reg2_fy_2022,reg1_fy_2023, reg2_fy_2023,
          add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark","\\checkmark","\\checkmark"),
                         c("Year Fixed Effects","\\checkmark","\\checkmark","\\checkmark","\\checkmark")),
          omit=c("Constant", "subcommittee", "year"),
          notes.append = FALSE,notes.label = "",
          report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
          font.size = "footnotesize", model.numbers = FALSE,
          column.labels = c("2022", "2023"), column.separate = c(2,2),
          dep.var.labels = c("Request Granted","Pct. Granted","Request Granted","Pct. Granted"),column.sep.width="0pt",
          covariate.labels=c( "Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                             "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Percent Core Counties", "Percent Swing Counties",
                             "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                             "Freshman Senator", "Average Pct. Urban Pop in Affected Counties", "Log Avg. Median Household Income",
                              "Capital City"),
          notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
          senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. All continuous variables are standardized for ease of comparison.}",
          label="tab1_county_matched_sepyear",
          digits=3,
          coef=list(coefs_fy_2022, coefs2_fy_2022,coefs_fy_2023, coefs2_fy_2023), 
          se=list(ses_fy_2022, ses2_fy_2022,ses_fy_2023, ses2_fy_2023),
          digits.extra = 0,
          title="Predictors of Approved Spending Requests Subset by Fiscal Year",
          omit.stat = c("ll","rsq","adj.rsq","ser","f"))

##############
# TABLE C.17 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_alt_cut <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_alt_scaled + mean_swing_county_alt_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg1_alt_cut <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_alt_scaled + mean_swing_county_alt_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_alt_cut <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_alt_scaled + mean_swing_county_alt_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg2_alt_cut <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_alt_scaled + mean_swing_county_alt_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)


# Save coefficients and standard errors from the model
cond_alt_cut  <- data.frame(summary(reg1_fe_alt_cut))
cond2_alt_cut  <- data.frame(summary(reg2_fe_alt_cut))

coefs_alt_cut <- c(cond_alt_cut[,1])
ses_alt_cut <- c(cond_alt_cut[,2])
coefs2_alt_cut <- c(cond2_alt_cut[,1])
ses2_alt_cut <- c(cond2_alt_cut[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_alt_cut, reg2_alt_cut,
          add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                         c("Year Fixed Effects","\\checkmark","\\checkmark")),
          omit=c("Constant", "subcommittee", "year"),
          notes.append = FALSE,notes.label = "",
          report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
          font.size = "footnotesize", model.numbers = FALSE,
          dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
          covariate.labels=c("Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                            "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Percent Core Counties (57.5)", "Percent Swing Counties (47.5)",
                             "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                             "Freshman Senator", "Average Percent Urban Population in Affected Counties", "Average Median Household Income in Affected Counties",
                              "Capital City"),
           notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
           senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. All continuous variables are standardized for ease of comparison.}",
           label="tab1_county_matched_altcoreswing",
           digits=3,
           coef=list(coefs_alt_cut, coefs2_alt_cut), 
           se=list(ses_alt_cut, ses2_alt_cut),
           digits.extra = 0,
           title="Predictors of Approved Spending Requests Using Alternative Cutpoints for Core and Swing County",
           omit.stat = c("ll","rsq","adj.rsq","ser","f"))

##############
# TABLE C.18 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_continuous_safety <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_same_partyvote_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg1_continuous_safety <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_same_partyvote_scaled+
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_continuous_safety <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_same_partyvote_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg2_continuous_safety <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_same_partyvote_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)


# Save coefficients and standard errors from the model
cond_continuous_safety  <- data.frame(summary(reg1_fe_continuous_safety))
cond2_continuous_safety  <- data.frame(summary(reg2_fe_continuous_safety))

coefs_continuous_safety <- c(cond_continuous_safety[,1])
ses_continuous_safety <- c(cond_continuous_safety[,2])
coefs2_continuous_safety <- c(cond2_continuous_safety[,1])
ses2_continuous_safety <- c(cond2_continuous_safety[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_continuous_safety, reg2_continuous_safety,
                                 add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                                                c("Year Fixed Effects","\\checkmark","\\checkmark")),
                                 omit=c("Constant", "subcommittee", "year"),
                                 notes.append = FALSE,notes.label = "",
                                 report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
                                 font.size = "footnotesize", model.numbers = FALSE,
                                 dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
                                 covariate.labels=c( "Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                                                     "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Mean Same Party Vote",
                                                     "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                                                     "Freshman Senator", "Average Percent Urban Population in Affected Counties", "Average Median Household Income in Affected Counties",
                                                     "Capital City"),
                                 notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
                        senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. All continuous variables are standardized for ease of comparison.}",
                                 label="tab1_county_matched_altvote",
                                 digits=3,
                                 coef=list(coefs_continuous_safety, coefs2_continuous_safety), 
                                 se=list(ses_continuous_safety, ses2_continuous_safety),
                                 digits.extra = 0,
                                 title="Predictors of Approved Spending Requests Using a Continuous Measure of Electoral Safety",
                                 omit.stat = c("ll","rsq","adj.rsq","ser","f"))


##############
# TABLE C.20 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_lowerthird <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + tercile_income + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg1_lowerthird <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + tercile_income + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_lowerthird <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + tercile_income + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg2_lowerthird <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + tercile_income + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)


# Save coefficients and standard errors from the model
cond_lowerthird  <- data.frame(summary(reg1_fe_lowerthird))
cond2_lowerthird  <- data.frame(summary(reg2_fe_lowerthird))

coefs_lowerthird <- c(cond_lowerthird[,1])
ses_lowerthird <- c(cond_lowerthird[,2])
coefs2_lowerthird <- c(cond2_lowerthird[,1])
ses2_lowerthird <- c(cond2_lowerthird[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_lowerthird, reg2_lowerthird,
          add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                         c("Year Fixed Effects","\\checkmark","\\checkmark")),
          omit=c("Constant", "subcommittee", "year"),
          notes.append = FALSE,notes.label = "",
          report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
          font.size = "footnotesize", model.numbers = FALSE,
          dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
          covariate.labels=c("Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                            "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Percent Core Counties", "Percent Swing Counties",
                            "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                            "Freshman Senator", "Average Percent Urban Population in Affected Counties", "Tercile Measure of Median Household Income (Low to High)",
                            "Capital City"),
          notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
          senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. All continuous variables are standardized for ease of comparison.}",
          label="tab1_county_matched_altincome",
          digits=3,
          coef=list(coefs_lowerthird, coefs2_lowerthird), 
          se=list(ses_lowerthird, ses2_lowerthird),
          digits.extra = 0,
          title="Predictors of Approved Spending Requests Replacing Median Income with Tercile Measure of Median Household Income (Low to High)",
          omit.stat = c("ll","rsq","adj.rsq","ser","f"))


##############
# TABLE C.23 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe_poverty <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_per_poverty_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg1_poverty <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_per_poverty_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe_poverty <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_per_poverty_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg2_poverty <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_per_poverty_scaled + any_capital + factor(year) + factor(subcommittee),data=senators_appropriations_matched)


# Save coefficients and standard errors from the model
cond_poverty  <- data.frame(summary(reg1_fe_poverty))
cond2_poverty  <- data.frame(summary(reg2_fe_poverty))

coefs_poverty <- c(cond_poverty[,1])
ses_poverty <- c(cond_poverty[,2])
coefs2_poverty <- c(cond2_poverty[,1])
ses2_poverty <- c(cond2_poverty[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_poverty, reg2_poverty,
          add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                         c("Year Fixed Effects","\\checkmark","\\checkmark")),
          omit=c("Constant", "subcommittee", "year"),
          notes.append = FALSE,notes.label = "",
          report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
          font.size = "footnotesize", model.numbers = FALSE,
          dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
          covariate.labels=c("Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                            "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Percent Core Counties", "Percent Swing Counties",
                            "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                            "Freshman Senator", "Average Percent Urban Population in Affected Counties", "Average Percent Below the Poverty Line in Affected Counties",
                             "Capital City"),
          notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
          senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. All continuous variables are standardized for ease of comparison.}",
          label="tab1_county_matched_poverty",
          digits=3,
          coef=list(coefs_poverty, coefs2_poverty), 
          se=list(ses_poverty, ses2_poverty),
          digits.extra = 0,
          title="Predictors of Approved Spending Requests Replacing Median Income with Percent Below the Poverty Line",
          omit.stat = c("ll","rsq","adj.rsq","ser","f"))


##############
# TABLE C.24 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Model 1: Outcome variable is binary (was request granted or not)
reg1_fe <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg1 <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital +  factor(year) + factor(subcommittee),data=senators_appropriations_matched)

# Model 2: Outcome variable is a percentage (percentage of request that was fulfilled)
reg2_fe <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                        dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                        logpop_scaled + female + seniority_scaled + party_leader +
                        freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital +  factor(year) + factor(subcommittee),
                      cluster = "senator", data=senators_appropriations_matched)

reg2 <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital +  factor(year) + factor(subcommittee),data=senators_appropriations_matched)


# Save coefficients and standard errors from the model
cond  <- data.frame(summary(reg1_fe))
cond2  <- data.frame(summary(reg2_fe))

coefs <- c(cond[,1])
ses <- c(cond[,2])
coefs2 <- c(cond2[,1])
ses2 <- c(cond2[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1, reg2,
                                 #add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                                 #               c("Year Fixed Effects","\\checkmark","\\checkmark")),
                                 keep=c("subcommittee"),
                                 notes.append = FALSE,notes.label = "",
                                 report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
                                 font.size = "footnotesize", model.numbers = FALSE,
                                 dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
                                 covariate.labels=c("Commerce, Justice, Science, and Related",
                                                    "Energy and Water Development",
                                                    "Financial Services and General Government",
                                                    "Homeland Security",
                                                    "Interior, Environment, and Related",
                                                    "Labor, Health and Human Services, Education, and Related",
                                                    "Military Construction, Veterans Affairs, and Related",
                                                    "Transportation, Housing and Urban Development, and Related"),
                                 notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the subcommittee fixed effects from OLS regressions of a
                        senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$.}",
                                 label="tab1_matched_fixed",
                                 digits=3,
                                 coef=list(coefs, coefs2), 
                                 se=list(ses, ses2),
                                 digits.extra = 0,
                                 title="Subcommittee Fixed Effects From Table 3",
                                 omit.stat = c("ll","rsq","adj.rsq","ser","f"))

##############
# TABLE C.25 #
##############
# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Analysis 1: dropping requests greater than 100%
scaled_total_drop_over_100 <- senators_appropriations_matched[which(senators_appropriations_matched$percent_request_fulfilled <= 1),]

reg1_drop_over_100_fe <- lm.cluster(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
                                      dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                                      logpop_scaled + female + seniority_scaled + party_leader +
                                      freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),
                                    cluster = "senator", data=scaled_total_drop_over_100)

reg1_drop_over_100 <- lm(request_granted ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=scaled_total_drop_over_100)

reg2_drop_over_100_fe <- lm.cluster(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled + joint_request + num_counties_scaled +
                                      dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
                                      logpop_scaled + female + seniority_scaled + party_leader +
                                      freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital +  factor(year) + factor(subcommittee),
                                    cluster = "senator", data=scaled_total_drop_over_100)

reg2_drop_over_100 <- lm(percent_request_fulfilled ~ amount_percapita_scaled + log.total.requests.made_scaled +  joint_request + num_counties_scaled +
             dem + on_appropriations + meddist_scaled + mean_core_county_scaled + mean_swing_county_scaled +
             logpop_scaled + female + seniority_scaled + party_leader +
             freshman + mean_pct_urban_scaled + mean_median_household_income_scaled + any_capital + factor(year) + factor(subcommittee),data=scaled_total_drop_over_100)

# Save coefficients and standard errors from the model
cond_drop_over_100  <- data.frame(summary(reg1_drop_over_100_fe))
cond2_drop_over_100  <- data.frame(summary(reg2_drop_over_100_fe))

coefs_drop_over_100 <- c(cond_drop_over_100[,1])
ses_drop_over_100 <- c(cond_drop_over_100[,2])
coefs2_drop_over_100 <- c(cond2_drop_over_100[,1])
ses2_drop_over_100 <- c(cond2_drop_over_100[,2])

# Save the regression results as a table
# coef and se take the coefficients and standard errors from the clustered model
stargazer(reg1_drop_over_100, reg2_drop_over_100,
          add.lines=list(c("Subcommittee Fixed Effects","\\checkmark","\\checkmark"),
                         c("Year Fixed Effects","\\checkmark","\\checkmark")),
          omit=c("Constant", "subcommittee", "year"),
          notes.append = FALSE,notes.label = "",
          report="vc*s",star.char=c("*","**"),star.cutoffs = c(0.10,0.05),no.space = TRUE,
          font.size = "footnotesize", model.numbers = FALSE,
          dep.var.labels = c("Request Granted (Binary)","Percent of Request Granted"),column.sep.width="0pt",
          covariate.labels=c("Request Amount (Per Capita)", "Total Amount of Senator Requests (Log)", "Joint Request", "Number of Counties Affected", "Democrat (Majority Party Member)",
                             "Member of Appropriations Committee","Distance from DW-NOMINATE Median", "Percent Core Counties", "Percent Swing Counties",
                             "Log Total Population in Affected Counties", "Senator is a Woman","Seniority", "Party Leader", 
                             "Freshman Senator", "Average Percent Urban Population in Affected Counties", "Average Median Household Income in Affected Counties",
                             "Capital City"),
           notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a
           senator's county-level appropriation request behavior. Note that these models are limited to senators who actually made a request. Standard errors are clustered by senator. $^{**}p<0.05$, $^*p<0.10$. 
           All continuous variables are standardized for ease of comparison. Requests granted at over 100 percent are dropped.}",
           label="tab1_county_drop_over_100",
           digits=3,
           coef=list(coefs_drop_over_100, coefs2_drop_over_100), 
           se=list(ses_drop_over_100, ses2_drop_over_100),
           digits.extra = 0,
           title="Predictors of Approved Spending Requests (Dropping Requests Approved at Over 100 Percent)",
           omit.stat = c("ll","rsq","adj.rsq","ser","f"))

#######################
# FIGURES C.1 and C.2 #
#######################
#code from: Wijffels, J. (2023, January 4). UDPipe natural language processing - basic analytical use cases. https://cran.r-project.org/web/packages/udpipe/vignettes/udpipe-usecase-postagging-lemmatisation.html 

# Remove all objects
rm(list=ls())
# Loading the data
load("county_level_allocation_data.RData")

# Separate by if the request got funded
funded <- senators_appropriations_matched[senators_appropriations_matched$request_granted==1,]
not_funded <- senators_appropriations_matched[senators_appropriations_matched$request_granted==0,]

# Load in the udpipe materials for the model
model <- udpipe_download_model(language = "english")
udmodel_english <- udpipe_load_model(file = 'english-ewt-ud-2.5-191206.udpipe')

# Run the model for funded and not funded requests
model_funded <- udpipe_annotate(udmodel_english, funded$purpose)
model_not_funded  <- udpipe_annotate(udmodel_english, not_funded$purpose)

# Turn into dataframes
x_funded <- data.frame(model_funded)
x_not_funded <- data.frame(model_not_funded)

# Using a sequence of POS tags (noun phrases / verb phrases), funded requests
x_funded$phrase_tag <- as_phrasemachine(x_funded$upos, type = "upos")
stats_funded <- keywords_phrases(x = x_funded$phrase_tag, term = tolower(x_funded$token), 
                                 pattern = "(A|N)*N(P+D*(A|N)*N)*", 
                                 is_regex = TRUE, detailed = FALSE)
stats_funded <- subset(stats_funded, ngram > 1 & freq > 3)
stats_funded$key <- factor(stats_funded$keyword, levels = rev(stats_funded$keyword))
stats_funded <- stats_funded[!grepl("purpose of this project", stats_funded$keyword),]

# Make a bar chart
barchart(key ~ freq, data = head(stats_funded, 20), col = "black", 
                       main = "", xlab = "Frequency", xlim=c(0,310))

# Using a sequence of POS tags (noun phrases / verb phrases), not funded requests
x_not_funded$phrase_tag <- as_phrasemachine(x_not_funded$upos, type = "upos")
stats_not_funded <- keywords_phrases(x = x_not_funded$phrase_tag, term = tolower(x_not_funded$token), 
                                     pattern = "(A|N)*N(P+D*(A|N)*N)*", 
                                     is_regex = TRUE, detailed = FALSE)
stats_not_funded <- subset(stats_not_funded, ngram > 1 & freq > 3)
stats_not_funded$key <- factor(stats_not_funded$keyword, levels = rev(stats_not_funded$keyword))
stats_not_funded <- stats_not_funded[!grepl("purpose of this project", stats_not_funded$keyword),]

# Make a bar chart
barchart(key ~ freq, data = head(stats_not_funded, 20), col = "black", 
                          main = "", xlab = "Frequency", xlim=c(0,1050))



